home *** CD-ROM | disk | FTP | other *** search
- {*********************************************************}
- {* AABWT *}
- {* Copyright (c) Julian M Bucknall 2001 *}
- {* All rights reserved. *}
- {*********************************************************}
- {* Algorithms Alfresco: Burrows-Wheeler compression *}
- {*********************************************************}
-
- {Note: this unit is released as freeware. In other words, you are free
- to use this unit in your own applications, however I retain all
- copyright to the code. JMB}
-
- unit AABWT;
-
- interface
-
- uses
- SysUtils,
- Classes;
-
- procedure AABWTCompress(aInStream, aOutStream : TStream);
-
- procedure AABWTUncompress(aInStream, aOutStream : TStream);
-
- implementation
-
- uses
- AAHuffmn;
-
- type
- PIntArray = ^TIntArray;
- TIntArray = array [0..pred(MaxInt div sizeof(integer))] of integer;
-
- {===Quicksort for BWT================================================}
- const
- QSCutOff = 15;
- {--------}
- function CompareBlocks(aData1, aData2 : pointer;
- aSize : integer) : integer;
- var
- Data1 : PChar;
- Data2 : PChar;
- i : integer;
- begin
- Data1 := aData1;
- Data2 := aData2;
- i := aSize;
- while (i > 0) and (Data1^ = Data2^) do begin
- dec(i);
- inc(Data1);
- inc(Data2);
- end;
- if (i = 0) then
- Result := 0
- else if (Data1^ < Data2^) then
- Result := -1
- else
- Result := +1;
- end;
- {--------}
- procedure QSInsertionSort(aList : PPointerList;
- aFirst : integer;
- aLast : integer;
- aSize : integer);
- var
- i, j : integer;
- IndexOfMin : integer;
- Temp : pointer;
- begin
- {find the smallest element in the first QSCutOff items and put it in
- the first position}
- IndexOfMin := aFirst;
- j := QSCutOff;
- if (j > aLast) then
- j := aLast;
- for i := succ(aFirst) to j do
- if (CompareBlocks(aList^[i], aList^[IndexOfMin], aSize) < 0) then
- IndexOfMin := i;
- if (aFirst <> IndexOfMin) then begin
- Temp := aList^[aFirst];
- aList^[aFirst] := aList^[IndexOfMin];
- aList^[IndexOfMin] := Temp;
- end;
- {now sort via fast insertion method}
- for i := aFirst+2 to aLast do begin
- Temp := aList^[i];
- j := i;
- while (CompareBlocks(Temp, aList^[j-1], aSize) < 0) do begin
- aList^[j] := aList^[j-1];
- dec(j);
- end;
- aList^[j] := Temp;
- end;
- end;
- {--------}
- procedure QS(aList : PPointerList;
- aFirst : integer;
- aLast : integer;
- aSize : integer);
- var
- L, R : integer;
- Pivot : pointer;
- Temp : pointer;
- begin
- while ((aLast - aFirst) > QSCutOff) do begin
- {sort the first, middle and last items, then set the pivot to the
- middle one - the median-of-3 method}
- R := (aFirst + aLast) div 2;
- if (CompareBlocks(aList^[aFirst], aList^[R], aSize) > 0) then begin
- Temp := aList^[aFirst];
- aList^[aFirst] := aList^[R];
- aList^[R] := Temp;
- end;
- if (CompareBlocks(aList^[aFirst], aList^[aLast], aSize) > 0) then begin
- Temp := aList^[aFirst];
- aList^[aFirst] := aList^[aLast];
- aList^[aLast] := Temp;
- end;
- if (CompareBlocks(aList^[R], aList^[aLast], aSize) > 0) then begin
- Temp := aList^[R];
- aList^[R] := aList^[aLast];
- aList^[aLast] := Temp;
- end;
- Pivot := aList^[R];
- {set indexes and partition}
- L := aFirst;
- R := aLast;
- while true do begin
- repeat dec(R); until (CompareBlocks(aList^[R], Pivot, aSize) <= 0);
- repeat inc(L); until (CompareBlocks(aList^[L], Pivot, aSize) >= 0);
- if (L >= R) then Break;
- Temp := aList^[L];
- aList^[L] := aList^[R];
- aList^[R] := Temp;
- end;
- {quicksort the first subfile}
- QS(aList, aFirst, R, aSize);
- {quicksort the second subfile - recursion removal}
- aFirst := succ(R);
- end;
- end;
- {--------}
- procedure Quicksort(aList : PPointerList;
- aFirst : integer;
- aLast : integer;
- aSize : integer);
- begin
- QS(aList, aFirst, aLast, aSize);
- QSInsertionSort(aList, aFirst, aLast, aSize);
- end;
- {====================================================================}
-
-
- {===Distribution sort for BWT========================================}
- procedure DistSort(aInBlock, aOutBlock : PChar; aSize : integer;
- aStartPos : PIntArray);
- var
- i, j : integer;
- Counter : array [0..255] of longint;
- CumulCount : integer;
- begin
- {clear the counter array}
- FillChar(Counter, sizeof(Counter), 0);
-
- {calculate the distribution of each key}
- for i := 0 to pred(aSize) do begin
- inc(Counter[ord(aInBlock^)]);
- inc(aInBlock);
- end;
-
- {copy over the byte values to the auxiliary list in sorted order,
- generating the start positions for each character as we go}
- CumulCount := 0;
- for i := 0 to 255 do begin
- aStartPos^[i] := CumulCount;
- inc(CumulCount, Counter[i]);
- for j := 0 to pred(Counter[i]) do begin
- aOutBlock^ := char(i);
- inc(aOutBlock);
- end;
- end;
- end;
- {====================================================================}
-
-
- {====================================================================}
- function BinarySearch(aList : PPointerList;
- aCount : integer;
- aPtr : pointer) : integer;
- var
- L, R, M : integer;
- CompareResult : integer;
- begin
- L := 0;
- R := pred(aCount);
- while (L <= R) do begin
- M := (L + R) div 2;
- CompareResult := CompareBlocks(aPtr, aList^[M], aCount);
- if (CompareResult < 0) then
- R := M - 1
- else if (CompareResult > 0) then
- L := M + 1
- else begin
- Result := M;
- Exit;
- end;
- end;
- Assert(false,
- 'BinarySearch: the pointer should be in the list');
- Result := 0;
- end;
- {====================================================================}
-
-
- {====================================================================}
- function ApplyBWTransform(aInBlock : PChar;
- aOutBlock : PChar;
- aSize : integer) : integer;
- var
- i : integer;
- DataBlock : PChar;
- PtrList : PPointerList;
- TempPtr : PChar;
- begin
- {guard against dumb programming mistakes}
- Assert(aInBlock <> nil,
- 'ApplyBWTransform: input block cannot be nil');
- Assert(aOutBlock <> nil,
- 'ApplyBWTransform: output block cannot be nil');
- Assert(aSize > 0,
- 'ApplyBWTransform: block size must be positive');
-
- {prepare for the try..finally}
- DataBlock := nil;
- PtrList := nil;
- try
-
- {allocate the data block and fill it with two copies of the input
- block}
- GetMem(DataBlock, aSize * 2);
- Move(aInBlock^, DataBlock^, aSize);
- Move(aInBlock^, DataBlock[aSize], aSize);
-
- {allocate the list of pointers and set the elements to the
- individual characters in the data block: these will be our
- rotations of the block}
- GetMem(PtrList, aSize * sizeof(pointer));
- TempPtr := DataBlock;
- for i := 0 to pred(aSize) do begin
- PtrList^[i] := TempPtr;
- inc(TempPtr);
- end;
-
- {sort the pointer list}
- Quicksort(PtrList, 0, pred(aSize), aSize);
-
- {calculate the output block}
- for i := 0 to pred(aSize) do
- aOutBlock[i] := PChar(PtrList^[i])[pred(aSize)];
-
- {find the original block in the list}
- Result := BinarySearch(PtrList, aSize, DataBlock);
-
- finally
- FreeMem(DataBlock);
- FreeMem(PtrList);
- end;
- end;
- {--------}
- procedure MoveToFrontDecode(aInBlock : PChar;
- aOutBlock : PChar;
- aSize : integer);
- var
- i, j, k : integer;
- Decoder : array [0..255] of char;
-
- begin
- {initialize the encoder array}
- for i := 0 to 255 do
- Decoder[i] := char(i);
-
- {for all the bytes in the input block...}
- for i := 0 to pred(aSize) do begin
-
- {output the character at that position in the decoder array}
- j := ord(aInBlock^);
- aOUtBlock^ := Decoder[j];
-
- {move the character to the front of the decoder array}
- if (j > 0) then
- for k := j downto 1 do
- Decoder[k] := Decoder[k-1];
- Decoder[0] := aOutBlock^;
-
- {advance the input and output pointers}
- inc(aInBlock);
- inc(aOutBlock);
- end;
- end;
- {--------}
- procedure MoveToFrontEncode(aInBlock : PChar;
- aOutBlock : PChar;
- aSize : integer);
- var
- i, j, k : integer;
- Encoder : array [0..255] of char;
-
- begin
- {initialize the encoder array}
- for i := 0 to 255 do
- Encoder[i] := char(i);
-
- {for all the characters in the input block...}
- for i := 0 to pred(aSize) do begin
-
- {find it in the encoder array}
- for j := 0 to 255 do
- if (Encoder[j] = aInBlock^) then
- Break;
-
- {output the position}
- aOUtBlock^ := char(j);
-
- {move the character to the front of the encoder array}
- if (j > 0) then
- for k := j downto 1 do
- Encoder[k] := Encoder[k-1];
- Encoder[0] := aInBlock^;
-
- {advance the input and output pointers}
- inc(aInBlock);
- inc(aOutBlock);
- end;
- end;
- {--------}
- procedure UnapplyBWTransform(aInBlock : PChar;
- aOutBlock : PChar;
- aSize : integer;
- aIndex : integer);
- var
- i, j : integer;
- FirstCol : PChar;
- Temp : PChar;
- TransVector : PIntArray;
- StartPos : PIntArray;
- begin
- {prepare for the try..finally}
- FirstCol := nil;
- TransVector := nil;
- StartPos := nil;
-
- try
-
- {allocate the first column buffer and the transformation vector}
- GetMem(FirstCol, aSize);
- GetMem(TransVector, aSize * sizeof(integer));
- GetMem(StartPos, 256 * sizeof(integer));
-
- {sort the input block using distribution sort}
- DistSort(aInBlock, FirstCol, aSize, StartPos);
-
- {for each character in the unsorted block...}
- Temp := aInBlock;
- for i := 0 to pred(aSize) do begin
-
- {find the next occurrence of this character in the sorted block}
- j := StartPos[ord(Temp^)];
- inc(StartPos[ord(Temp^)]);
-
- {set the entry in the transformation vector}
- TransVector[j] := i;
-
- {advance to the next character in the unsorted block}
- inc(Temp);
- end;
-
- {we now have the transformation vector, so recreate the original
- data starting at the passed in index}
- j := aIndex;
- Temp := aOutBlock;
- for i := 0 to pred(aSize) do begin
- Temp^ := FirstCol[j];
- j := TransVector[j];
- inc(Temp);
- end;
-
- finally
- FreeMem(StartPos);
- FreeMem(TransVector);
- FreeMem(FirstCol);
- end;
- end;
- {====================================================================}
-
-
- {====================================================================}
- const
- BWTSignature = $57424141;
- {--------}
- procedure AABWTCompress(aInStream, aOutStream : TStream);
- const
- BufSize = 16*1024;
- var
- InBuf : PChar;
- BWTBuf : PChar;
- MTFBuf : PChar;
- BytesRead : integer;
- LongBuf : longint;
- WordBuf : word;
- Index : integer;
- begin
- {prepare for the try..finally}
- InBuf := nil;
- BWTBuf := nil;
- MTFBuf := nil;
-
- try
-
- {allocate the buffers}
- GetMem(InBuf, BufSize);
- GetMem(BWTBuf, BufSize);
- GetMem(MTFBuf, BufSize);
-
- {write the header information to the output stream}
- LongBuf := BWTSignature;
- aOutStream.WriteBuffer(LongBuf, sizeof(LongBuf));
- LongBuf := aInStream.Size;
- aOutStream.WriteBuffer(LongBuf, sizeof(LongBuf));
- WordBuf := BufSize;
- aOutStream.WriteBuffer(WordBuf, sizeof(WordBuf));
-
- {read the first buffer}
- BytesRead := aInStream.Read(InBuf^, BufSize);
-
- {while there is data to compress...}
- while (BytesRead <> 0) do begin
-
- {apply the BWT transform to this buffer}
- Index := ApplyBWTransform(InBuf, BWTBuf, BytesRead);
-
- {write the index to the output stream}
- WordBuf := Index;
- aOutStream.WriteBuffer(WordBuf, sizeof(WordBuf));
-
- {encode the BWT buffer with the Move-To-Front algorithm}
- MoveToFrontEncode(BWTBuf, MTFBuf, BytesRead);
-
- {compress the MTF buffer with Huffman}
- HuffmanCompressBlock(MTFBuf^, BytesRead, aOutStream);
-
- {read the next bufferful}
- BytesRead := aInStream.Read(InBuf^, BufSize);
-
- end;
-
- finally
- FreeMem(MTFBuf);
- FreeMem(BWTBuf);
- FreeMem(InBuf);
- end;
- end;
- {====================================================================}
- procedure AABWTUncompress(aInStream, aOutStream : TStream);
- var
- OutBuf : PChar;
- BWTBuf : PChar;
- MTFBuf : PChar;
- BytesRead : integer;
- LongBuf : longint;
- WordBuf : word;
- Index : integer;
- Size : longint;
- BufSize : integer;
- BytesToRead : integer;
- begin
- {prepare for the try..finally}
- OutBuf := nil;
- BWTBuf := nil;
- MTFBuf := nil;
-
- try
-
- {read the header information from the input stream}
- BytesRead := aInStream.Read(LongBuf, sizeof(LongBuf));
- if (BytesRead <> sizeof(LongBuf)) or
- (LongBuf <> BWTSignature) then
- raise Exception.Create(
- 'AABWTUncompress: input stream is not a BWT compressed stream');
- aInStream.ReadBuffer(Size, sizeof(Size));
- aInStream.ReadBuffer(WordBuf, sizeof(WordBuf));
- BufSize := WordBuf;
-
- {allocate the buffers}
- GetMem(OutBuf, BufSize);
- GetMem(BWTBuf, BufSize);
- GetMem(MTFBuf, BufSize);
-
- {while there is still data to uncompress...}
- while (Size <> 0) do begin
-
- {read the index for the next buffer}
- aInStream.ReadBuffer(WordBuf, sizeof(WordBuf));
- Index := WordBuf;
-
- {read and decompress the next block}
- BytesToRead := Size;
- if (BytesToRead > BufSize) then
- BytesToRead := BufSize;
- HuffmanDecompressBlock(aInStream, MTFBuf^, BytesToRead);
-
- {decode using the Move-To-Front algorithm}
- MoveToFrontDecode(MTFBuf, BWTBuf, BytesToRead);
-
- {perform the reverse BWT transform}
- UnapplyBWTransform(BWTBuf, OutBuf, BytesToRead, Index);
-
- {write out the decompressed buffer}
- aOutStream.WriteBuffer(OutBuf^, BytesTORead);
-
- {update the loop variable}
- dec(Size, BytesTORead);
- end;
-
- finally
- FreeMem(MTFBuf);
- FreeMem(BWTBuf);
- FreeMem(OutBuf);
- end;
- end;
- {====================================================================}
-
- end.
-